global g_root "C:\Users\Hongye Guo\Dropbox (Personal)\Density\Code\Production\Submission"
import delimited using "$g_root\Data\TSImplementationCombined.csv", varn(1) clear
rename p price
rename d dvd
rename e earnings

* Backstop crsp return with gfd return
gen ret = vwretd
replace ret = ret_gfd if mi(ret)
gen dp = dvd/price
gen ep = earnings/price

* Ts var
gen year = floor(date/100)
gen month = date - 100 * year
gen ym = ym(year, month)
sort ym
gen ret_f1m = ret[_n+1]

* Payout ratio
gen de_flag = !mi(de)
gen de_expw_mean = sum(de)
gen de_expw_count = sum(de_flag)
replace de_expw_mean =de_expw_mean/de_expw_count 
drop de_flag de_expw_count 

gen roe_flag = !mi(roe)
gen roe_expw_count = sum(roe_flag)
gen roe_expw_mean = sum(roe)
replace roe_expw_mean = roe_expw_mean/roe_expw_count 
drop roe_flag roe_expw_count 

* expanding window return mean
gen ln_ret = ln(1 + ret)
gen ln_ret_flag = !mi(ln_ret)
gen ln_ret_count = sum(ln_ret_flag) 
gen ret_expw_mean = sum(ret)
replace ret_expw_mean = ret_expw_mean/ln_ret_count 
drop ln_ret_count ln_ret_flag 

* Benchmark forecasts in Campbell Thompson 2008
gen fc_dp = (dp + (1-de_expw_mean)*roe_expw_mean)/12 
gen fc_ep = (ep * de_expw_mean + (1-de_expw_mean)*roe_expw_mean)/12 
gen fc_bm = roe_expw_mean*(1+de_expw_mean*(bm-1))/12 
gen fc_cbd = (fc_dp + fc_ep + fc_bm)/3

* Generate signal
drop month 
gen month = month(dofm(ym+1))
gen am1_flag = mod(month, 3) == 1

* Use the backstopped version of return
drop vwretd
rename ret vwretd
sort ym
forvalues i = 1/12{
	gen vwretd_l`i'm = vwretd[_n-`i']
}
gen vwretd_f1m = vwretd[_n+1]

gen vwretd_l0m = vwretd
forvalues i = 1/4{
	local j = (`i'-1)*3 
	local jp1 = `j' + 1
	local jp2 = `j' + 2
	gen vwretd_prevq1_l`i'q = vwretd_l`j'm
	replace vwretd_prevq1_l`i'q = vwretd_l`jp1'm if mod(month,3)==0
	replace vwretd_prevq1_l`i'q = vwretd_l`jp2'm if mod(month,3)==1
}

gen vwretd_prevq1_t4q = vwretd_prevq1_l1q + vwretd_prevq1_l2q + vwretd_prevq1_l3q + vwretd_prevq1_l4q 
gen vwretd_prevq1_t4q_xam = vwretd_prevq1_t4q * am1_flag

* Raw signal is simply the past 4 newsy month returns 
* Then sign flipped if dv is newsy
gen signal = vwretd_prevq1_t4q
replace signal = -vwretd_prevq1_t4q if am1_flag

sum ym if !mi(signal) & !mi(vwretd_f1m)
local l_min = `r(min)' 
local l_max = `r(max)'

gen ret_expw_mean_am = .
gen ret_expw_mean_nam = .
gen vwretd_prevq1_t4q_expw_mean = .

forvalues l_ym = `l_min'/`l_max'{
	qui sum vwretd_f1m if ym<=`l_ym' & am1_flag
	replace ret_expw_mean_am = `r(mean)' if ym == `l_ym' + 1
	
	qui sum vwretd_f1m if ym<=`l_ym' & !am1_flag
	replace ret_expw_mean_nam = `r(mean)' if ym == `l_ym' + 1
	
	qui sum vwretd_prevq1_t4q if ym<=`l_ym' 
	replace vwretd_prevq1_t4q_expw_mean= `r(mean)' if ym == `l_ym' + 1
} 

* Demean with expanding window mean
gen vwretd_prevq1_t4q_dm = vwretd_prevq1_t4q - vwretd_prevq1_t4q_expw_mean
gen signal_dm = vwretd_prevq1_t4q_dm 
replace signal_dm = - vwretd_prevq1_t4q_dm if am1_flag

* Various versions of estimation method
gen expw_b_dm = .
gen expw_c_dm = .

gen expw_b_am = .
gen expw_c_am = .
gen expw_b_nam = .
gen expw_c_nam = .

gen expw_b_am_dm = .
gen expw_c_am_dm = .
gen expw_b_nam_dm = .
gen expw_c_nam_dm = .

sum ym if !mi(signal) & !mi(vwretd_f1m)
local l_min = `r(min)' + 5
local l_max = `r(max)'

forvalues l_ym = `l_min'/`l_max'{
	qui reg vwretd_f1m signal if ym<=`l_ym' & am1_flag
	mat B = e(b)
	replace expw_b_am = B[1,1] if ym == `l_ym' + 1
	replace expw_c_am = B[1,2] if ym == `l_ym' + 1
	
	qui reg vwretd_f1m signal if ym<=`l_ym' & !am1_flag
	mat B = e(b)
	replace expw_b_nam = B[1,1] if ym == `l_ym' + 1
	replace expw_c_nam = B[1,2] if ym == `l_ym' + 1
	
	qui reg vwretd_f1m signal_dm if ym<=`l_ym'
	mat B = e(b)
	replace expw_b_dm = B[1,1] if ym == `l_ym' + 1
	replace expw_c_dm = B[1,2] if ym == `l_ym' + 1
	
	qui reg vwretd_f1m signal_dm if ym<=`l_ym' & am1_flag
	mat B = e(b)
	replace expw_b_am_dm = B[1,1] if ym == `l_ym' + 1
	replace expw_c_am_dm = B[1,2] if ym == `l_ym' + 1
	
	qui reg vwretd_f1m signal_dm if ym<=`l_ym' & !am1_flag
	mat B = e(b)
	replace expw_b_nam_dm = B[1,1] if ym == `l_ym' + 1
	replace expw_c_nam_dm = B[1,2] if ym == `l_ym' + 1
} 
tempfile tf_main
save `tf_main', replace

* Table 4
{
	use `tf_main', clear
	gen fc_1 = expw_b_dm * signal_dm + expw_c_dm 
	gen fc_2 = expw_b_dm * signal_dm + ret_expw_mean
	gen fc_3 = expw_b_dm * signal_dm + fc_cbd

	gen fc_4 = expw_b_am_dm * signal_dm + expw_c_am_dm if am1_flag
	replace fc_4 = expw_b_nam_dm * signal_dm + expw_c_nam_dm if !am1_flag

	gen fc_5 = expw_b_am_dm * signal_dm + ret_expw_mean_am if am1_flag
	replace fc_5 = expw_b_nam_dm * signal_dm + ret_expw_mean_nam if !am1_flag

	gen fc_6 = expw_b_am_dm * signal_dm + fc_cbd if am1_flag
	replace fc_6 = expw_b_nam_dm * signal_dm + fc_cbd if !am1_flag

	gen fc_7 = expw_b_am * signal + expw_c_am if am1_flag
	replace fc_7 = expw_b_nam * signal + expw_c_nam if !am1_flag

	gen common_flag = !mi(fc_1) & !mi(fc_2) & !mi(fc_3) & !mi(fc_cbd) & !mi(vwretd_f1m) & ym>=ym(1926, 1)
	
	forvalues i = 1/7{
		gen res_`i' = (vwretd_f1m - fc_`i')^2 if common_flag 
	}
	gen res_8 = (vwretd_f1m - fc_cbd)^2 if common_flag 
	gen res_9 = (vwretd_f1m - ret_expw_mean)^2 if common_flag 

	gen var_sqr = (vwretd_f1m - ret_expw_mean)^2 if common_flag 
	egen tss = total(var_sqr)
	forvalues i = 1/9{
		egen rss_`i' = total(res_`i')
		gen r2_`i' = 1 - rss_`i'/tss
	}

	sum r2_*
	keep in 1
	keep r2_*

	rename r2_8 rr2_0
	rename r2_7 rr2_1
	rename r2_1 rr2_5
	rename r2_2 rr2_6
	rename r2_3 rr2_7
	rename r2_4 rr2_2
	rename r2_5 rr2_3
	rename r2_6 rr2_4
	order rr2_0 rr2_1 rr2_2 rr2_3 rr2_4 rr2_5 rr2_6 rr2_7 
}

* Table A2
* Data in other_sig and merged_signals are pseudo data
{
	use `tf_main', clear
	merge 1:1 ym using  "$g_root/Data/other_sig_pseudo.dta", keep(1 3) nogen
	merge 1:1 ym using  "$g_root/Data/merged_signals_pseudo.dta", keep(1 3) nogen

	* 2 versions of vrp: use the cm one
	sum vrp vrp_cm if !mi(vrp)
	replace vrp = vrp_cm 

	local l_sig_list dy ep bm svar csp ntis eqis tbl lty dfy dfr infl ik cay tms aem hkm sii kp svix gm_expg_rt vrp sent_orth 

	foreach l_var in `l_sig_list'{
		reg vwretd_f1m `l_var', robust
	}

	foreach l_var in `l_sig_list'{
		gen ym_`l_var' = ym if !mi(`l_var')
		format ym_* %tm
		sum ym_*, f
	}

	sort ym
	foreach l_sig in `l_sig_list' ret{
		gen test_`l_sig' = sum(`l_sig')
		gen count_`l_sig' = sum(!mi(`l_sig'))
		gen expm_`l_sig' = test_`l_sig'/count_`l_sig'
		drop test_`l_sig'
		gen `l_sig'_dm = `l_sig'-expm_`l_sig' 
	}

	foreach l_sig in `l_sig_list'{
		gen `l_sig'_dm_filled = `l_sig'_dm 
		replace `l_sig'_dm_filled = 0 if mi(`l_sig'_dm)
	}

	local l_list
	foreach l_sig in `l_sig_list'{
		local l_list `l_list' `l_sig'_dm_filled
	}

	* Column 1: In sample
	foreach l_sig in `l_sig_list'{
		gen temp_common_flag = !mi(`l_sig') & !mi(fc_cbd) & !mi(vwretd_f1m) & ym>=ym(1926, 1) & ym<ym(2019, 12)	
		
		qui reg vwretd_f1m `l_sig' if temp_common_flag 
		mat B = e(b)
		gen b_`l_sig'_is = B[1,1] 
		predict fc_`l_sig'_is 
		
		gen temp_res = (vwretd_f1m - fc_`l_sig'_is)^2 if temp_common_flag 
		
		gen temp_var_sqr = (vwretd_f1m - ret_expw_mean)^2 if temp_common_flag 
		egen temp_tss = total(temp_var_sqr)
		egen temp_rss = total(temp_res)
		gen r2_is_`l_sig' = 1 - temp_rss/temp_tss if !mi(fc_`l_sig'_is)
		drop temp_*
	}
	sum r2_*
	drop r2_*

	* Column 2-5 OOS
	foreach l_sig in `l_sig_list'{

		gen temp_expw_b_dm = .
		gen temp_expw_c_dm = .

		sum ym if !mi(`l_sig') & !mi(vwretd_f1m)
		* Column 2
		* Column 3: change the 120 to 60 for the 5 year results
		local l_min = `r(min)' + 120
		local l_max = `r(max)'

		forvalues l_ym = `l_min'/`l_max'{
			qui reg vwretd_f1m `l_sig'_dm if ym<=`l_ym'
			mat B = e(b)
			replace temp_expw_b_dm = B[1,1] if ym == `l_ym' + 1
			replace temp_expw_c_dm = B[1,2] if ym == `l_ym' + 1
			
			* Column 4 & 5: comment out the next line
			replace temp_expw_b_dm  = 0 if temp_expw_b_dm*b_`l_sig'_is  < 0
		} 
		gen fc_`l_sig'_oos_dm = temp_expw_b_dm * `l_sig'_dm + ret_expw_mean
		* Column 4 & 5: comment out the next line
		replace fc_`l_sig'_oos_dm  = rfree if fc_`l_sig'_oos_dm <rfree
		drop temp_expw_b_dm temp_expw_c_dm
		
		gen temp_common_flag = !mi(fc_`l_sig'_oos_dm) & !mi(fc_cbd) & !mi(vwretd_f1m) & ym>=ym(1926, 1)	& ym<ym(2019, 12)	
		gen temp_res = (vwretd_f1m - fc_`l_sig'_oos_dm)^2 if temp_common_flag 
		
		gen temp_var_sqr = (vwretd_f1m - ret_expw_mean)^2 if temp_common_flag 
		egen temp_tss = total(temp_var_sqr)
		egen temp_rss = total(temp_res)
		gen r2_`l_sig' = 1 - temp_rss/temp_tss if !mi(fc_`l_sig'_oos_dm)
		drop temp_*
	}
	sum r2_*
}

* Table A3
{
	use `tf_main', clear
	merge 1:1 ym using "$g_root\Data\basic_factor_202106.dta", keep(1 3) nogen

	gen fc = expw_b_dm * signal_dm + ret_expw_mean
	gen fc_dm = expw_b_dm * signal_dm
	gen vwretd_f1m_dm = vwretd_f1m - ret_expw_mean

	* Figure 4
	gen fc_pct = fc*100
	tw line fc_pct ym if ym>=ym(1926,11) || line rf ym if ym>=ym(1926,11)
	
	sum mktrf
	local l_sd = `r(sd)'
	gen pf_wgt = fc - ret_expw_mean
	gen pf_ret = pf_wgt * vwretd_f1m
	sum pf_ret
	replace pf_ret = pf_ret/`r(sd)'*`l_sd'

	tsset ym
	foreach l_signal in mktrf hml smb mom {
		gen `l_signal'_f1m= F1.`l_signal'
	}

	* Table A3
	clear matrix
	local col_count = 0

	reg pf_ret if ym>=ym(1926,10) & ym<=ym(2021,5), robust
	local col_count = `col_count' + 1
	estimates store sep_`col_count'	
	reg pf_ret mktrf_f1m if ym>=ym(1926,10) & ym<=ym(2021,5), robust
	local col_count = `col_count' + 1
	estimates store sep_`col_count'	
	reg pf_ret mktrf_f1m hml_f1m smb_f1m if ym>=ym(1926,10) & ym<=ym(2021,5), robust
	local col_count = `col_count' + 1
	estimates store sep_`col_count'	
	reg pf_ret mktrf_f1m hml_f1m smb_f1m mom_f1m if ym>=ym(1926,10) & ym<=ym(2021,5), robust
	local col_count = `col_count' + 1
	estimates store sep_`col_count'	

	local l_date : display %tdCYND date(c(current_date), "DMY")
	local l_time = substr(c(current_time), 1, 2) + substr(c(current_time), 4, 2)
	esttab sep_* using "$g_root/Output/USalpha_`l_date'`l_time'.csv", star(* 0.1  ** 0.05 *** 0.01) b(3) t(2) r2 br noomit replace 
}